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#1 


Use ‘regexp_like’ to replace ‘LIKE’ clauses 


oS A 


SELEG SELEG 
FROM FROM 
tablel tablel 
WHERE WHERE 
lower(item_name) LIKE ‘%samsung%' OR REGEXP_LIKE(lower(item_name), 


lower(item_name) LIKE '%xiaomi%' OR "samsung | xiaomi | iphone | huawei') 
lower(item name) LIKE '%iphone%' OR 
lower(item name) LIKE ‘%huawei%' 


--and so on 


#2 


Use ‘regexp_extract’ to replace ‘Case-when Like’ 


eS 


SELECT 

CASE 
WHEN concat(' ',item name,' ') LIKE 'XacerX' then ‘Acer’ 
WHEN concat(' ',item name," ') LIKE '%advance%' then 'Advance' 
WHEN concat(' ',item name,' ') LIKE ‘%alfalink%' then 'Alfalink' 


AS brand 
FROM item list 


SELECT 

regexp_extract(item_name, '(asus|lenovo|hp|acer|dell|zyrex|...)') 
AS brand 
FROM item_list 


#3 


Convert long list of IN clause into a temporary table 


a 


SELECT = SELECT © 
FROM Tablei as t1 FROM Tablel as t1 
WHERE JOIN ( 
itemid in (3363134, SELECT 
5189076, .., 4062349) itemid 
FROM ( 
SELECT 


split('3363134, 5189076, ..,' 
as bar 


) 
CROSS JOIN 
UNNEST (bar) AS t(itemid) 
) AS Table2 as t2 
ON 
t1.itemid = t2.itemid 


Hl, 


Always order your JOINs from largest tables to 
smallest tables 


A 


SELECT SELECT 
fs * 
FROM FROM 
small table large table 


JOIN JOIN 
large table small table 
ON small table.id - large table.id ON small table.id - large table.id 


#5 
Use simple equi-joins 


Two tables with date string e.g., ‘2020-09-01’, but one of the tables only has columns for year, month, 
day values 


~ A 


SELECT st SELECT * 
FROM FROM 
table1 a table1 a 
JOIN JOIN ( 
table2 b select 
ON a.date = CONCAT(b.year, '-' name, CONCAT(b.year, ‘-', b.month, '-', b.day) as date 


b.month, '-', b.day) from 
table2 b 
) new 
ON a.date = new.date 


#6 
Always "GROUP BY" by the attribute/column with the 
largest number of unique entities/values 


LS A 


select select 
main_category, main_category, 
sub_category, sub_category, 
itemid, itemid, 
sum(price) sum(price) 


from from 
table1 table1 
group by group by 
main_category, sub_category, itemid itemid, sub_category, main_category 


#7 


Avoid subqueries in WHERE clause 


os < 


select with t2 as ( 
sum(price) select itemid 
from from table2 


table1 ) 
where 
itemid in ( select 


select itemid sum(price) 
from table2 from 
tablel as t1 
join 
t2 
on t1.itemid = t2.itemid 


#8 


Use Max instead of Rank 


— < 


SELECT * SELECT userid, max(prdate) 
from ( from table1 
select group by 1 
userid, 


rank() over (order by prdate desc) as rank 
from table1 


) 


where ranking = 1 


#9 
Other Tips 


Use approx_distinct() instead of count(distinct) for very large 
datasets 

Use approx_percentile(metric, 0.5) for median 

Avoid UNIONs where possible 

Use WITH statements vs. nested subqueries 


